#------------------------------------------------------------------------------
# create appendix tables s2
#==============================================================================

load_library = c('bit64','data.table','fst','future.apply','stringr','logger','vroom')
invisible(lapply(load_library, function(x) library(x, character.only=TRUE, quietly= TRUE)))

bucket = '/N/project/iuni_doctorshopping/'

extract_date_from_filename = function(outfile){
  date = str_extract(outfile,"\\d{4}-\\d{2}-\\d{2}")
  return(date)
}

extract_week_from_filename = function(outfile){
  date = str_extract(outfile,"\\d{4}W\\d{2}")
  date = gsub(' ','_',date)
  return(date)
}

process_period = function(ind_combined){
	target_week = ind_combined[year==2020, unique(week)] %>% sort
	ind_combined = ind_combined[week %in% target_week, ]
	
	ind_combined[week %in% 1:10, period := 1L]
	ind_combined[week %in% 11:27, period := 2L]
	ind_combined[week %in% 28:40, period := 3L]
	return(ind_combined)	
}

# ___ opioid prescriptions 
files_oprx = dir(file.path(bucket, 'derived_v4_202101','weekly', 'rx_reduced', 'ses', 'opioid'), full.names = TRUE)
files_pain = dir(file.path(bucket, 'derived_v4_202101','weekly', 'patient_list', 'ses', 'pain'), full.names = TRUE,
	pattern=c('backpain|neckpain|limbpain'))

# __ combine pain patients 
dt_pain = lapply(files_pain, function(ff){
	dt = fread(ff)
	week_year = extract_week_from_filename(ff)
	year = as.integer(strsplit(week_year, 'W')[[1]][1])
	week = as.integer(strsplit(week_year, 'W')[[1]][2])
	dt$year = year 
	dt$week = week
	return(dt)
}) %>% rbindlist

# __ combine op prescription data
dt_oprx = lapply(files_oprx, function(ff){
	dt = fread(ff)
	dt = dt[, .(n_rx = .N), by = c('PATID','NDC')]
	week_year = extract_week_from_filename(ff)
	year = strsplit(week_year, 'W')[[1]][1]
	week = strsplit(week_year, 'W')[[1]][2]
	dt$year = year 
	dt$week = week
	return(dt)
}) %>% rbindlist

dt_oprx[, week := as.integer(week)]
dt_oprx[, year := as.integer(year)]
dt_oprx = process_period(dt_oprx)

# __ filter based on pain patients 
dt_oprx = merge(dt_oprx, dt_pain, by=c('PATID','year','week'))

# load drug info
drugs_all = fread(file.path(bucket,"additional_data","drug.tsv")) # use NDC for matching

# merge data sets
dt_oprx_class = merge(dt_oprx, drugs_all[Class == 'Opioid',c('NDC_Numeric','Drug','Generic_Drug_Name')], by.x='NDC',  by.y='NDC_Numeric')

# ___ table s2. 
table_s2_tab = dt_oprx_class[, .(n_patient = uniqueN(PATID)), by=c('Generic_Drug_Name','year','period')]
table_s2_tab = dcast(table_s2_tab, Generic_Drug_Name ~ year + period, value.var='n_patient')
setorder(table_s2_tab, -`2019_1`)

rio::export(table_s2_tab, file.path(bucket,'projects','covid_opioid','table_s2_opioid_list.xlsx'))
